load libraries
library(ggbiplot)
## Loading required package: ggplot2
## Loading required package: plyr
## Loading required package: scales
## Warning: package 'scales' was built under R version 3.3.2
## Loading required package: grid
library(nnet)
library(dplyr)
## Warning: package 'dplyr' was built under R version 3.3.2
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:plyr':
##
## arrange, count, desc, failwith, id, mutate, rename, summarise,
## summarize
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(ggplot2)
library(GGally)
##
## Attaching package: 'GGally'
## The following object is masked from 'package:dplyr':
##
## nasa
library(randomForest)
## randomForest 4.6-12
## Type rfNews() to see new features/changes/bug fixes.
##
## Attaching package: 'randomForest'
## The following object is masked from 'package:dplyr':
##
## combine
## The following object is masked from 'package:ggplot2':
##
## margin
library(plotROC)
library(knitr)
read data
total = read.csv('totale.csv', stringsAsFactors = F)
argenine = read.csv('argenine-totale.csv', stringsAsFactors = F)
total_noSeg = read.csv("total_data.csv", stringsAsFactors = F)
total_noSeg$argenine= argenine$argenine
total_noSeg$health2[total_noSeg$health2==1]= 'ILL'
total_noSeg$health2[total_noSeg$health2==2]= 'Healthy'
total_noSeg$recoddiseas2[is.na(total_noSeg$recoddiseas2)]= 0
{r} # hist((total_noSeg$aspartate- mean(total_noSeg$aspartate))/sqrt(var(total_noSeg$aspartate)), breaks = 20) # # shapiro.test((total_noSeg$aspartate- mean(total_noSeg$aspartate))/sqrt(var(total_noSeg$aspartate))) # # hist(total_noSeg$glutamate, breaks = 20) # # hist(total_noSeg$citrolline, breaks = 20) #
seperate data to ill and healthy
total_noSeg %>% filter(health2=='ILL')-> ill
## Warning: package 'bindrcpp' was built under R version 3.3.2
## Warning: `as_dictionary()` is soft-deprecated as of rlang 0.3.0.
## Please use `as_data_pronoun()` instead
## This warning is displayed once per session.
## Warning: `new_overscope()` is soft-deprecated as of rlang 0.2.0.
## Please use `new_data_mask()` instead
## This warning is displayed once per session.
## Warning: The `parent` argument of `new_data_mask()` is deprecated.
## The parent of the data mask is determined from either:
##
## * The `env` argument of `eval_tidy()`
## * Quosure environments when applicable
## This warning is displayed once per session.
## Warning: `overscope_clean()` is soft-deprecated as of rlang 0.2.0.
## This warning is displayed once per session.
total_noSeg %>% filter(health2=='Healthy')-> healthy
pair-wise scatterplot
ggpairs(data=total_noSeg, # data.frame with variables
mapping = ggplot2::aes(colour = as.factor(health2)),
lower = list(continuous = wrap("smooth", method = "lm")),
columns=6:8, # columns to plot, default to all.
title=("Vitiligo data") # title of the plot
)

correlation of amino acids in ill and healthy groups
ggcorr(total_noSeg[, c(6:27)], palette = "RdBu", label = F)

PCA(without proportions)
pca = prcomp(total_noSeg[, 6:27], scale. = T )
eigv = round(pca$sdev^2/sum(pca$sdev^2)*100, 2)
eigv = data.frame(c(1:22),eigv)
names(eigv) = c('PCs','Variance')
# plot(eigv,type = "b",col = "darkblue",lwd = 2);grid()
PCA = pca$sdev^2
names(PCA) = paste0('PC', eigv$PCs)
qcc::pareto.chart(PCA)

##
## Pareto chart analysis for PCA
## Frequency Cum.Freq. Percentage Cum.Percent.
## PC1 7.84117244 7.841172 35.6416929 35.64169
## PC2 3.67095704 11.512129 16.6861684 52.32786
## PC3 1.75973707 13.271867 7.9988048 60.32667
## PC4 1.22317726 14.495044 5.5598966 65.88656
## PC5 1.03993471 15.534979 4.7269760 70.61354
## PC6 0.93403939 16.469018 4.2456336 74.85917
## PC7 0.82609387 17.295112 3.7549721 78.61414
## PC8 0.75455680 18.049669 3.4298037 82.04395
## PC9 0.66718557 18.716854 3.0326617 85.07661
## PC10 0.49948225 19.216336 2.2703739 87.34698
## PC11 0.49321390 19.709550 2.2418814 89.58887
## PC12 0.43796486 20.147515 1.9907494 91.57961
## PC13 0.39855576 20.546071 1.8116171 93.39123
## PC14 0.30454543 20.850616 1.3842974 94.77553
## PC15 0.26097433 21.111591 1.1862470 95.96178
## PC16 0.23876983 21.350361 1.0853174 97.04709
## PC17 0.18794711 21.538308 0.8543050 97.90140
## PC18 0.15869163 21.696999 0.7213256 98.62272
## PC19 0.11975295 21.816752 0.5443316 99.16706
## PC20 0.07603278 21.892785 0.3456035 99.51266
## PC21 0.06150454 21.954290 0.2795661 99.79223
## PC22 0.04571047 22.000000 0.2077749 100.00000
biplot(pca, cex= 0.5)

Classification using PC1 and PC2: health2
ggbiplot(pca, obs.scale = 1, var.scale = 1,
groups = total_noSeg$health2, ellipse = TRUE, circle = TRUE) +
scale_color_discrete(name = '') +
theme(legend.direction = 'horizontal', legend.position = 'top')

Classification using PC1 and PC2: nonsegmentale
ggbiplot(pca, obs.scale = 1, var.scale = 1,
groups = as.factor(total_noSeg$nonsegmentale), ellipse = TRUE, circle = TRUE) +
scale_color_discrete(name = '') +
theme(legend.direction = 'horizontal', legend.position = 'top')

PCA(with proportions)
pca = prcomp(total_noSeg[, 6:45], scale. = T )
eigv = round(pca$sdev^2/sum(pca$sdev^2)*100, 2)
eigv = data.frame(c(1:40),eigv)
names(eigv) = c('PCs','Variance')
# plot(eigv,type = "b",col = "darkblue",lwd = 2);grid()
PCA = pca$sdev^2
names(PCA) = paste0('PC', eigv$PCs)
qcc::pareto.chart(PCA)

##
## Pareto chart analysis for PCA
## Frequency Cum.Freq. Percentage Cum.Percent.
## PC1 8.730764084 8.730764 21.826910211 21.82691
## PC2 5.968581762 14.699346 14.921454404 36.74836
## PC3 3.925588279 18.624934 9.813970698 46.56234
## PC4 3.088183995 21.713118 7.720459989 54.28280
## PC5 2.760456722 24.473575 6.901141806 61.18394
## PC6 2.371001651 26.844576 5.927504127 67.11144
## PC7 1.652114370 28.496691 4.130285924 71.24173
## PC8 1.431774832 29.928466 3.579437079 74.82116
## PC9 1.245777010 31.174243 3.114442524 77.93561
## PC10 1.025514306 32.199757 2.563785765 80.49939
## PC11 0.956852902 33.156610 2.392132256 82.89152
## PC12 0.871254883 34.027865 2.178137207 85.06966
## PC13 0.704561125 34.732426 1.761402813 86.83106
## PC14 0.674038678 35.406465 1.685096694 88.51616
## PC15 0.639180716 36.045645 1.597951789 90.11411
## PC16 0.529362576 36.575008 1.323406440 91.43752
## PC17 0.481647839 37.056656 1.204119598 92.64164
## PC18 0.411155933 37.467812 1.027889832 93.66953
## PC19 0.376511206 37.844323 0.941278016 94.61081
## PC20 0.337844446 38.182167 0.844611116 95.45542
## PC21 0.300062556 38.482230 0.750156390 96.20557
## PC22 0.254954516 38.737184 0.637386289 96.84296
## PC23 0.247645273 38.984830 0.619113183 97.46207
## PC24 0.176469708 39.161299 0.441174271 97.90325
## PC25 0.161531791 39.322831 0.403829477 98.30708
## PC26 0.122199299 39.445030 0.305498248 98.61258
## PC27 0.106995231 39.552026 0.267488078 98.88006
## PC28 0.094485092 39.646511 0.236212730 99.11628
## PC29 0.077503553 39.724014 0.193758882 99.31004
## PC30 0.065377214 39.789392 0.163443035 99.47348
## PC31 0.046753862 39.836145 0.116884655 99.59036
## PC32 0.042576305 39.878722 0.106440761 99.69680
## PC33 0.038084729 39.916806 0.095211822 99.79202
## PC34 0.032193511 39.949000 0.080483777 99.87250
## PC35 0.018120596 39.967121 0.045301490 99.91780
## PC36 0.012985963 39.980107 0.032464907 99.95027
## PC37 0.007234250 39.987341 0.018085624 99.96835
## PC38 0.005564460 39.992905 0.013911150 99.98226
## PC39 0.004678498 39.997584 0.011696245 99.99396
## PC40 0.002416279 40.000000 0.006040698 100.00000
biplot(pca, cex= 0.5)

Classification using PC1 and PC2: health2
ggbiplot(pca, obs.scale = 1, var.scale = 1,
groups = total_noSeg$health2, ellipse = TRUE, circle = TRUE) +
scale_color_discrete(name = '') +
theme(legend.direction = 'horizontal', legend.position = 'top')

Classification using PC1 and PC2: nonsegmentale
ggbiplot(pca, obs.scale = 1, var.scale = 1,
groups = as.factor(total_noSeg$nonsegmentale), ellipse = TRUE, circle = TRUE) +
scale_color_discrete(name = '') +
theme(legend.direction = 'horizontal', legend.position = 'top')

Mann-Whitney test on all the columns(both aminoacids and proportions) vs health2
ill_healthy_mw=as.data.frame(sapply(total_noSeg[, c(6:45)], function(x)
wilcox.test(x~total_noSeg$health2)
) )
ill_healthy_mw = rbind(ill_healthy_mw , adjusted_p.value_hochberg = p.adjust(unlist(ill_healthy_mw[3,]), method = 'hochberg'))
ill_healthy_mw = rbind(ill_healthy_mw , adjusted_p.value_fdr = p.adjust(unlist(ill_healthy_mw[3,]), method = 'fdr'))
ill_healthy_mw = rbind(ill_healthy_mw, ill_mean =colMeans(total_noSeg[total_noSeg$health2=='ILL', 6:45]) )
ill_healthy_mw = rbind(ill_healthy_mw, healthy_mean =colMeans(total_noSeg[total_noSeg$health2=='Healthy', 6:45]) )
kable(ill_healthy_mw)
| statistic |
294 |
373 |
614 |
520 |
750 |
818 |
626 |
864 |
481 |
453 |
1052 |
304 |
199 |
520 |
740 |
322 |
443 |
548 |
568 |
1020 |
1085 |
262 |
870 |
688 |
804 |
775 |
998 |
796 |
1051 |
809 |
835 |
765 |
636 |
991 |
705 |
1119 |
1067 |
547 |
676 |
958 |
| parameter |
NULL |
NULL |
NULL |
NULL |
NULL |
NULL |
NULL |
NULL |
NULL |
NULL |
NULL |
NULL |
NULL |
NULL |
NULL |
NULL |
NULL |
NULL |
NULL |
NULL |
NULL |
NULL |
NULL |
NULL |
NULL |
NULL |
NULL |
NULL |
NULL |
NULL |
NULL |
NULL |
NULL |
NULL |
NULL |
NULL |
NULL |
NULL |
NULL |
NULL |
| p.value |
7.470903e-05 |
0.002892484 |
0.8682826 |
0.2131406 |
0.1664351 |
0.02941469 |
0.9771904 |
0.006423474 |
0.08963578 |
0.04288233 |
2.871616e-07 |
0.0001262896 |
1.696178e-07 |
0.2131406 |
0.2047685 |
0.000309769 |
0.03215244 |
0.3563652 |
0.4885482 |
2.685578e-06 |
2.102398e-08 |
1.215316e-05 |
0.005149363 |
0.5030627 |
0.0441031 |
0.09412582 |
1.077071e-05 |
0.05494843 |
3.092354e-07 |
0.03827753 |
0.01735733 |
0.1193018 |
0.9407409 |
1.637282e-05 |
0.387128 |
9.531111e-10 |
9.128393e-08 |
0.3504003 |
0.5946418 |
0.0001026077 |
| null.value |
0 |
0 |
0 |
0 |
0 |
0 |
0 |
0 |
0 |
0 |
0 |
0 |
0 |
0 |
0 |
0 |
0 |
0 |
0 |
0 |
0 |
0 |
0 |
0 |
0 |
0 |
0 |
0 |
0 |
0 |
0 |
0 |
0 |
0 |
0 |
0 |
0 |
0 |
0 |
0 |
| alternative |
two.sided |
two.sided |
two.sided |
two.sided |
two.sided |
two.sided |
two.sided |
two.sided |
two.sided |
two.sided |
two.sided |
two.sided |
two.sided |
two.sided |
two.sided |
two.sided |
two.sided |
two.sided |
two.sided |
two.sided |
two.sided |
two.sided |
two.sided |
two.sided |
two.sided |
two.sided |
two.sided |
two.sided |
two.sided |
two.sided |
two.sided |
two.sided |
two.sided |
two.sided |
two.sided |
two.sided |
two.sided |
two.sided |
two.sided |
two.sided |
| method |
Wilcoxon rank sum test |
Wilcoxon rank sum test |
Wilcoxon rank sum test |
Wilcoxon rank sum test |
Wilcoxon rank sum test |
Wilcoxon rank sum test |
Wilcoxon rank sum test |
Wilcoxon rank sum test |
Wilcoxon rank sum test |
Wilcoxon rank sum test |
Wilcoxon rank sum test |
Wilcoxon rank sum test |
Wilcoxon rank sum test |
Wilcoxon rank sum test |
Wilcoxon rank sum test |
Wilcoxon rank sum test |
Wilcoxon rank sum test |
Wilcoxon rank sum test |
Wilcoxon rank sum test |
Wilcoxon rank sum test |
Wilcoxon rank sum test |
Wilcoxon rank sum test |
Wilcoxon rank sum test |
Wilcoxon rank sum test |
Wilcoxon rank sum test |
Wilcoxon rank sum test |
Wilcoxon rank sum test |
Wilcoxon rank sum test |
Wilcoxon rank sum test |
Wilcoxon rank sum test |
Wilcoxon rank sum test |
Wilcoxon rank sum test |
Wilcoxon rank sum test |
Wilcoxon rank sum test |
Wilcoxon rank sum test |
Wilcoxon rank sum test |
Wilcoxon rank sum test |
Wilcoxon rank sum test |
Wilcoxon rank sum test |
Wilcoxon rank sum test |
| data.name |
x by total_noSeg$health2 |
x by total_noSeg$health2 |
x by total_noSeg$health2 |
x by total_noSeg$health2 |
x by total_noSeg$health2 |
x by total_noSeg$health2 |
x by total_noSeg$health2 |
x by total_noSeg$health2 |
x by total_noSeg$health2 |
x by total_noSeg$health2 |
x by total_noSeg$health2 |
x by total_noSeg$health2 |
x by total_noSeg$health2 |
x by total_noSeg$health2 |
x by total_noSeg$health2 |
x by total_noSeg$health2 |
x by total_noSeg$health2 |
x by total_noSeg$health2 |
x by total_noSeg$health2 |
x by total_noSeg$health2 |
x by total_noSeg$health2 |
x by total_noSeg$health2 |
x by total_noSeg$health2 |
x by total_noSeg$health2 |
x by total_noSeg$health2 |
x by total_noSeg$health2 |
x by total_noSeg$health2 |
x by total_noSeg$health2 |
x by total_noSeg$health2 |
x by total_noSeg$health2 |
x by total_noSeg$health2 |
x by total_noSeg$health2 |
x by total_noSeg$health2 |
x by total_noSeg$health2 |
x by total_noSeg$health2 |
x by total_noSeg$health2 |
x by total_noSeg$health2 |
x by total_noSeg$health2 |
x by total_noSeg$health2 |
x by total_noSeg$health2 |
| adjusted_p.value_hochberg |
0.002241271 |
0.07520457 |
0.9771904 |
0.9771904 |
0.9771904 |
0.6471231 |
0.9771904 |
0.1541634 |
0.9771904 |
0.7938558 |
1.033782e-05 |
0.003536108 |
6.275859e-06 |
0.9771904 |
0.9771904 |
0.008363764 |
0.6752013 |
0.9771904 |
0.9771904 |
9.130966e-05 |
8.199353e-07 |
0.000388901 |
0.1287341 |
0.9771904 |
0.7938558 |
0.9771904 |
0.0003554335 |
0.9341233 |
1.082324e-05 |
0.7655506 |
0.3992185 |
0.9771904 |
0.9771904 |
0.0005075574 |
0.9771904 |
3.812444e-08 |
3.468789e-06 |
0.9771904 |
0.9771904 |
0.002975622 |
| adjusted_p.value_fdr |
0.0002716692 |
0.00771329 |
0.9139817 |
0.2750201 |
0.2377644 |
0.06192566 |
0.9771904 |
0.01511406 |
0.1434173 |
0.07670104 |
2.061569e-06 |
0.0003885833 |
1.696178e-06 |
0.2750201 |
0.2750201 |
0.0008850544 |
0.06430489 |
0.4319578 |
0.5583408 |
1.534616e-05 |
4.204796e-07 |
5.401403e-05 |
0.01287341 |
0.5589585 |
0.07670104 |
0.144809 |
5.385356e-05 |
0.09158071 |
2.061569e-06 |
0.07290958 |
0.03857183 |
0.1767434 |
0.9648624 |
6.549127e-05 |
0.4554448 |
3.812444e-08 |
1.217119e-06 |
0.4319578 |
0.642856 |
0.0003420255 |
| ill_mean |
4.052947 |
25.71046 |
34.53927 |
65.11522 |
446.2532 |
347.6143 |
90.41223 |
41.24307 |
23.52489 |
317.1558 |
52.79352 |
59.25787 |
240.2853 |
180.0727 |
23.17314 |
37.60369 |
39.23904 |
89.37466 |
75.96696 |
39.18057 |
105.1448 |
153.963 |
0.6754138 |
4.833879 |
4.285784 |
0.5466366 |
2.819966 |
0.6376018 |
0.4019113 |
0.145391 |
1.044568 |
20.88475 |
13.68517 |
1.873045 |
0.6207717 |
1.38737 |
0.176677 |
1.466339 |
0.3726656 |
0.3249112 |
| healthy_mean |
3.124116 |
16.89225 |
35.607 |
60.76664 |
499.0092 |
433.6578 |
91.01064 |
50.49853 |
21.41336 |
276.4907 |
76.72541 |
45.13108 |
106.5219 |
160.3457 |
24.21595 |
30.74202 |
34.33406 |
82.61671 |
71.83639 |
60.22598 |
156.3828 |
111.1731 |
0.8031685 |
4.699328 |
4.57674 |
0.5917922 |
3.650282 |
0.688878 |
0.5786692 |
0.1591727 |
1.195207 |
23.70645 |
14.02739 |
2.731364 |
0.6419407 |
2.311366 |
0.296098 |
1.388073 |
0.386422 |
0.5218794 |
kolmogorov-smirnov test on all the columns(both aminoacids and proportions) vs health2
ill_healthy_ks=as.data.frame(sapply(total_noSeg[, c(6:45)], function(x)
ks.test(x[total_noSeg$health2=='ILL'], x[total_noSeg$health2=='Healthy'])
))
ill_healthy_ks = rbind(ill_healthy_ks , adjusted_p.value_hoshberg = p.adjust(unlist(ill_healthy_ks[2,]), method = 'hochberg'))
ill_healthy_ks = rbind(ill_healthy_ks , adjusted_p.value_fdr = p.adjust(unlist(ill_healthy_ks[2,]), method = 'fdr'))
ill_healthy_ks = rbind(ill_healthy_ks, ill_mean =colMeans(total_noSeg[total_noSeg$health2=='ILL', 6:45]) )
ill_healthy_ks = rbind(ill_healthy_ks, healthy_mean =colMeans(total_noSeg[total_noSeg$health2=='Healthy', 6:45]) )
kable(ill_healthy_ks)
| statistic |
0.4721781 |
0.4570747 |
0.13593 |
0.2416534 |
0.2440382 |
0.2535771 |
0.145469 |
0.4324324 |
0.2559618 |
0.3076312 |
0.6073132 |
0.4753577 |
0.6931638 |
0.1995231 |
0.1891892 |
0.4944356 |
0.2686804 |
0.1868045 |
0.1677266 |
0.5039746 |
0.627186 |
0.5015898 |
0.3449921 |
0.2265501 |
0.4252782 |
0.3418124 |
0.4602544 |
0.3052464 |
0.600159 |
0.2710652 |
0.4205087 |
0.2662957 |
0.1414944 |
0.5580286 |
0.182035 |
0.7154213 |
0.6025437 |
0.2074722 |
0.2265501 |
0.445151 |
| p.value |
0.000391774 |
0.000691112 |
0.8323904 |
0.2061064 |
0.1978191 |
0.1658162 |
0.7835548 |
0.001592514 |
0.1579749 |
0.05372972 |
1.219774e-06 |
0.0003315605 |
1.021109e-08 |
0.4112498 |
0.4685635 |
0.0001759579 |
0.1231664 |
0.4864487 |
0.6204319 |
0.0001135205 |
4.947073e-07 |
0.0001249568 |
0.02095612 |
0.2685011 |
0.002019555 |
0.02290865 |
0.0006351303 |
0.05696566 |
1.927058e-06 |
0.1179892 |
0.002367018 |
0.1286008 |
0.8023009 |
1.100577e-05 |
0.5241761 |
3.439466e-09 |
1.710893e-06 |
0.3656205 |
0.2685011 |
0.001035115 |
| alternative |
two-sided |
two-sided |
two-sided |
two-sided |
two-sided |
two-sided |
two-sided |
two-sided |
two-sided |
two-sided |
two-sided |
two-sided |
two-sided |
two-sided |
two-sided |
two-sided |
two-sided |
two-sided |
two-sided |
two-sided |
two-sided |
two-sided |
two-sided |
two-sided |
two-sided |
two-sided |
two-sided |
two-sided |
two-sided |
two-sided |
two-sided |
two-sided |
two-sided |
two-sided |
two-sided |
two-sided |
two-sided |
two-sided |
two-sided |
two-sided |
| method |
Two-sample Kolmogorov-Smirnov test |
Two-sample Kolmogorov-Smirnov test |
Two-sample Kolmogorov-Smirnov test |
Two-sample Kolmogorov-Smirnov test |
Two-sample Kolmogorov-Smirnov test |
Two-sample Kolmogorov-Smirnov test |
Two-sample Kolmogorov-Smirnov test |
Two-sample Kolmogorov-Smirnov test |
Two-sample Kolmogorov-Smirnov test |
Two-sample Kolmogorov-Smirnov test |
Two-sample Kolmogorov-Smirnov test |
Two-sample Kolmogorov-Smirnov test |
Two-sample Kolmogorov-Smirnov test |
Two-sample Kolmogorov-Smirnov test |
Two-sample Kolmogorov-Smirnov test |
Two-sample Kolmogorov-Smirnov test |
Two-sample Kolmogorov-Smirnov test |
Two-sample Kolmogorov-Smirnov test |
Two-sample Kolmogorov-Smirnov test |
Two-sample Kolmogorov-Smirnov test |
Two-sample Kolmogorov-Smirnov test |
Two-sample Kolmogorov-Smirnov test |
Two-sample Kolmogorov-Smirnov test |
Two-sample Kolmogorov-Smirnov test |
Two-sample Kolmogorov-Smirnov test |
Two-sample Kolmogorov-Smirnov test |
Two-sample Kolmogorov-Smirnov test |
Two-sample Kolmogorov-Smirnov test |
Two-sample Kolmogorov-Smirnov test |
Two-sample Kolmogorov-Smirnov test |
Two-sample Kolmogorov-Smirnov test |
Two-sample Kolmogorov-Smirnov test |
Two-sample Kolmogorov-Smirnov test |
Two-sample Kolmogorov-Smirnov test |
Two-sample Kolmogorov-Smirnov test |
Two-sample Kolmogorov-Smirnov test |
Two-sample Kolmogorov-Smirnov test |
Two-sample Kolmogorov-Smirnov test |
Two-sample Kolmogorov-Smirnov test |
Two-sample Kolmogorov-Smirnov test |
| data.name |
x[total_noSeg$health2 == “ILL”] and x[total_noSeg$health2 == “Healthy”] |
x[total_noSeg$health2 == “ILL”] and x[total_noSeg$health2 == “Healthy”] |
x[total_noSeg$health2 == “ILL”] and x[total_noSeg$health2 == “Healthy”] |
x[total_noSeg$health2 == “ILL”] and x[total_noSeg$health2 == “Healthy”] |
x[total_noSeg$health2 == “ILL”] and x[total_noSeg$health2 == “Healthy”] |
x[total_noSeg$health2 == “ILL”] and x[total_noSeg$health2 == “Healthy”] |
x[total_noSeg$health2 == “ILL”] and x[total_noSeg$health2 == “Healthy”] |
x[total_noSeg$health2 == “ILL”] and x[total_noSeg$health2 == “Healthy”] |
x[total_noSeg$health2 == “ILL”] and x[total_noSeg$health2 == “Healthy”] |
x[total_noSeg$health2 == “ILL”] and x[total_noSeg$health2 == “Healthy”] |
x[total_noSeg$health2 == “ILL”] and x[total_noSeg$health2 == “Healthy”] |
x[total_noSeg$health2 == “ILL”] and x[total_noSeg$health2 == “Healthy”] |
x[total_noSeg$health2 == “ILL”] and x[total_noSeg$health2 == “Healthy”] |
x[total_noSeg$health2 == “ILL”] and x[total_noSeg$health2 == “Healthy”] |
x[total_noSeg$health2 == “ILL”] and x[total_noSeg$health2 == “Healthy”] |
x[total_noSeg$health2 == “ILL”] and x[total_noSeg$health2 == “Healthy”] |
x[total_noSeg$health2 == “ILL”] and x[total_noSeg$health2 == “Healthy”] |
x[total_noSeg$health2 == “ILL”] and x[total_noSeg$health2 == “Healthy”] |
x[total_noSeg$health2 == “ILL”] and x[total_noSeg$health2 == “Healthy”] |
x[total_noSeg$health2 == “ILL”] and x[total_noSeg$health2 == “Healthy”] |
x[total_noSeg$health2 == “ILL”] and x[total_noSeg$health2 == “Healthy”] |
x[total_noSeg$health2 == “ILL”] and x[total_noSeg$health2 == “Healthy”] |
x[total_noSeg$health2 == “ILL”] and x[total_noSeg$health2 == “Healthy”] |
x[total_noSeg$health2 == “ILL”] and x[total_noSeg$health2 == “Healthy”] |
x[total_noSeg$health2 == “ILL”] and x[total_noSeg$health2 == “Healthy”] |
x[total_noSeg$health2 == “ILL”] and x[total_noSeg$health2 == “Healthy”] |
x[total_noSeg$health2 == “ILL”] and x[total_noSeg$health2 == “Healthy”] |
x[total_noSeg$health2 == “ILL”] and x[total_noSeg$health2 == “Healthy”] |
x[total_noSeg$health2 == “ILL”] and x[total_noSeg$health2 == “Healthy”] |
x[total_noSeg$health2 == “ILL”] and x[total_noSeg$health2 == “Healthy”] |
x[total_noSeg$health2 == “ILL”] and x[total_noSeg$health2 == “Healthy”] |
x[total_noSeg$health2 == “ILL”] and x[total_noSeg$health2 == “Healthy”] |
x[total_noSeg$health2 == “ILL”] and x[total_noSeg$health2 == “Healthy”] |
x[total_noSeg$health2 == “ILL”] and x[total_noSeg$health2 == “Healthy”] |
x[total_noSeg$health2 == “ILL”] and x[total_noSeg$health2 == “Healthy”] |
x[total_noSeg$health2 == “ILL”] and x[total_noSeg$health2 == “Healthy”] |
x[total_noSeg$health2 == “ILL”] and x[total_noSeg$health2 == “Healthy”] |
x[total_noSeg$health2 == “ILL”] and x[total_noSeg$health2 == “Healthy”] |
x[total_noSeg$health2 == “ILL”] and x[total_noSeg$health2 == “Healthy”] |
x[total_noSeg$health2 == “ILL”] and x[total_noSeg$health2 == “Healthy”] |
| adjusted_p.value_hoshberg |
0.01136145 |
0.01866002 |
0.8323904 |
0.8323904 |
0.8323904 |
0.8323904 |
0.8323904 |
0.03981285 |
0.8323904 |
0.8323904 |
4.513165e-05 |
0.009946816 |
3.982327e-07 |
0.8323904 |
0.8323904 |
0.005454695 |
0.8323904 |
0.8323904 |
0.8323904 |
0.003746177 |
1.879888e-05 |
0.003998618 |
0.4610347 |
0.8323904 |
0.04846931 |
0.4810816 |
0.01778365 |
0.8323904 |
6.744703e-05 |
0.8323904 |
0.05444141 |
0.8323904 |
0.8323904 |
0.0003741963 |
0.8323904 |
1.375786e-07 |
6.159215e-05 |
0.8323904 |
0.8323904 |
0.026913 |
| adjusted_p.value_fdr |
0.001305913 |
0.001974606 |
0.8323904 |
0.2842847 |
0.2825988 |
0.2456536 |
0.8228727 |
0.003981285 |
0.2430384 |
0.1023423 |
1.219774e-05 |
0.001205675 |
2.042219e-07 |
0.4984846 |
0.5512511 |
0.0007038316 |
0.2052773 |
0.5559413 |
0.6707372 |
0.0005553636 |
6.596097e-06 |
0.0005553636 |
0.04411815 |
0.3464531 |
0.004751893 |
0.04581729 |
0.001954247 |
0.1035739 |
1.284705e-05 |
0.2051986 |
0.00526004 |
0.2057613 |
0.8228727 |
6.289013e-05 |
0.5824179 |
1.375786e-07 |
1.284705e-05 |
0.4570257 |
0.3464531 |
0.002760308 |
| ill_mean |
4.052947 |
25.71046 |
34.53927 |
65.11522 |
446.2532 |
347.6143 |
90.41223 |
41.24307 |
23.52489 |
317.1558 |
52.79352 |
59.25787 |
240.2853 |
180.0727 |
23.17314 |
37.60369 |
39.23904 |
89.37466 |
75.96696 |
39.18057 |
105.1448 |
153.963 |
0.6754138 |
4.833879 |
4.285784 |
0.5466366 |
2.819966 |
0.6376018 |
0.4019113 |
0.145391 |
1.044568 |
20.88475 |
13.68517 |
1.873045 |
0.6207717 |
1.38737 |
0.176677 |
1.466339 |
0.3726656 |
0.3249112 |
| healthy_mean |
3.124116 |
16.89225 |
35.607 |
60.76664 |
499.0092 |
433.6578 |
91.01064 |
50.49853 |
21.41336 |
276.4907 |
76.72541 |
45.13108 |
106.5219 |
160.3457 |
24.21595 |
30.74202 |
34.33406 |
82.61671 |
71.83639 |
60.22598 |
156.3828 |
111.1731 |
0.8031685 |
4.699328 |
4.57674 |
0.5917922 |
3.650282 |
0.688878 |
0.5786692 |
0.1591727 |
1.195207 |
23.70645 |
14.02739 |
2.731364 |
0.6419407 |
2.311366 |
0.296098 |
1.388073 |
0.386422 |
0.5218794 |
kruskal-Wallis test on all columns vs nonsegmentale(vosate lake)
nonsegmental_kw = as.data.frame(sapply(total_noSeg[, c(6:45)], function(x)
kruskal.test(x~total_noSeg$nonsegmentale)))
nonsegmental_kw = rbind(nonsegmental_kw , adjusted_p.value_hochberg = p.adjust(unlist(nonsegmental_kw[3,]), method = 'hochberg'))
nonsegmental_kw = rbind(nonsegmental_kw , adjusted_p.value_fdr = p.adjust(unlist(nonsegmental_kw[3,]), method = 'fdr'))
nonsegmental_kw = rbind(nonsegmental_kw, control_mean =colMeans(total_noSeg[total_noSeg$nonsegmentale==0, 6:45]) )
nonsegmental_kw = rbind(nonsegmental_kw, khafif_mean =colMeans(total_noSeg[total_noSeg$nonsegmentale==1, 6:45]) )
nonsegmental_kw = rbind(nonsegmental_kw, shadid_mean =colMeans(total_noSeg[total_noSeg$nonsegmentale==2, 6:45]) )
kable(nonsegmental_kw)
| statistic |
17.56103 |
16.33092 |
0.1847818 |
2.341535 |
2.412248 |
4.904904 |
0.01178877 |
7.581861 |
6.681908 |
4.156372 |
24.50807 |
17.01707 |
24.49691 |
4.818431 |
4.649657 |
13.29306 |
6.617138 |
5.594535 |
8.153247 |
21.49123 |
28.72841 |
18.57233 |
7.726989 |
0.5130945 |
6.48968 |
7.247254 |
19.33717 |
3.710166 |
23.5963 |
4.891572 |
10.30448 |
2.598715 |
0.04063286 |
19.79508 |
3.3545 |
31.82358 |
25.8012 |
1.430741 |
0.358777 |
14.5681 |
| parameter |
2 |
2 |
2 |
2 |
2 |
2 |
2 |
2 |
2 |
2 |
2 |
2 |
2 |
2 |
2 |
2 |
2 |
2 |
2 |
2 |
2 |
2 |
2 |
2 |
2 |
2 |
2 |
2 |
2 |
2 |
2 |
2 |
2 |
2 |
2 |
2 |
2 |
2 |
2 |
2 |
| p.value |
0.000153699 |
0.0002843062 |
0.9117487 |
0.3101289 |
0.2993553 |
0.08608227 |
0.994123 |
0.02257458 |
0.03540318 |
0.1251571 |
4.765857e-06 |
0.0002017397 |
4.792505e-06 |
0.08988577 |
0.09780024 |
0.001298518 |
0.03656847 |
0.06097646 |
0.01696465 |
2.15396e-05 |
5.777035e-07 |
9.269788e-05 |
0.0209945 |
0.7737184 |
0.0389748 |
0.02668572 |
6.323938e-05 |
0.1564399 |
7.51847e-06 |
0.086658 |
0.00578643 |
0.2727069 |
0.9798886 |
5.029832e-05 |
0.1868872 |
1.229131e-07 |
2.496548e-06 |
0.4890109 |
0.8357811 |
0.0006864012 |
| method |
Kruskal-Wallis rank sum test |
Kruskal-Wallis rank sum test |
Kruskal-Wallis rank sum test |
Kruskal-Wallis rank sum test |
Kruskal-Wallis rank sum test |
Kruskal-Wallis rank sum test |
Kruskal-Wallis rank sum test |
Kruskal-Wallis rank sum test |
Kruskal-Wallis rank sum test |
Kruskal-Wallis rank sum test |
Kruskal-Wallis rank sum test |
Kruskal-Wallis rank sum test |
Kruskal-Wallis rank sum test |
Kruskal-Wallis rank sum test |
Kruskal-Wallis rank sum test |
Kruskal-Wallis rank sum test |
Kruskal-Wallis rank sum test |
Kruskal-Wallis rank sum test |
Kruskal-Wallis rank sum test |
Kruskal-Wallis rank sum test |
Kruskal-Wallis rank sum test |
Kruskal-Wallis rank sum test |
Kruskal-Wallis rank sum test |
Kruskal-Wallis rank sum test |
Kruskal-Wallis rank sum test |
Kruskal-Wallis rank sum test |
Kruskal-Wallis rank sum test |
Kruskal-Wallis rank sum test |
Kruskal-Wallis rank sum test |
Kruskal-Wallis rank sum test |
Kruskal-Wallis rank sum test |
Kruskal-Wallis rank sum test |
Kruskal-Wallis rank sum test |
Kruskal-Wallis rank sum test |
Kruskal-Wallis rank sum test |
Kruskal-Wallis rank sum test |
Kruskal-Wallis rank sum test |
Kruskal-Wallis rank sum test |
Kruskal-Wallis rank sum test |
Kruskal-Wallis rank sum test |
| data.name |
x by total_noSeg$nonsegmentale |
x by total_noSeg$nonsegmentale |
x by total_noSeg$nonsegmentale |
x by total_noSeg$nonsegmentale |
x by total_noSeg$nonsegmentale |
x by total_noSeg$nonsegmentale |
x by total_noSeg$nonsegmentale |
x by total_noSeg$nonsegmentale |
x by total_noSeg$nonsegmentale |
x by total_noSeg$nonsegmentale |
x by total_noSeg$nonsegmentale |
x by total_noSeg$nonsegmentale |
x by total_noSeg$nonsegmentale |
x by total_noSeg$nonsegmentale |
x by total_noSeg$nonsegmentale |
x by total_noSeg$nonsegmentale |
x by total_noSeg$nonsegmentale |
x by total_noSeg$nonsegmentale |
x by total_noSeg$nonsegmentale |
x by total_noSeg$nonsegmentale |
x by total_noSeg$nonsegmentale |
x by total_noSeg$nonsegmentale |
x by total_noSeg$nonsegmentale |
x by total_noSeg$nonsegmentale |
x by total_noSeg$nonsegmentale |
x by total_noSeg$nonsegmentale |
x by total_noSeg$nonsegmentale |
x by total_noSeg$nonsegmentale |
x by total_noSeg$nonsegmentale |
x by total_noSeg$nonsegmentale |
x by total_noSeg$nonsegmentale |
x by total_noSeg$nonsegmentale |
x by total_noSeg$nonsegmentale |
x by total_noSeg$nonsegmentale |
x by total_noSeg$nonsegmentale |
x by total_noSeg$nonsegmentale |
x by total_noSeg$nonsegmentale |
x by total_noSeg$nonsegmentale |
x by total_noSeg$nonsegmentale |
x by total_noSeg$nonsegmentale |
| adjusted_p.value_hochberg |
0.004610969 |
0.007960573 |
0.994123 |
0.994123 |
0.994123 |
0.994123 |
0.994123 |
0.4966408 |
0.6948009 |
0.994123 |
0.0001725302 |
0.00585045 |
0.0001725302 |
0.994123 |
0.994123 |
0.03376146 |
0.6948009 |
0.994123 |
0.4071516 |
0.0007323464 |
2.253044e-05 |
0.002873634 |
0.4828736 |
0.994123 |
0.7015464 |
0.5604 |
0.00202366 |
0.994123 |
0.0002631464 |
0.994123 |
0.1446607 |
0.994123 |
0.994123 |
0.001659845 |
0.994123 |
4.916524e-06 |
9.486881e-05 |
0.994123 |
0.994123 |
0.01853283 |
| adjusted_p.value_fdr |
0.0005589053 |
0.0008747882 |
0.9597354 |
0.3648575 |
0.362855 |
0.1331641 |
0.994123 |
0.04752544 |
0.06648812 |
0.1726304 |
3.834004e-05 |
0.0006724655 |
3.834004e-05 |
0.1331641 |
0.1397146 |
0.003462714 |
0.06648812 |
0.1016274 |
0.03991682 |
0.0001230834 |
1.155407e-05 |
0.0003707915 |
0.04665445 |
0.8596871 |
0.06778226 |
0.05337143 |
0.0002810639 |
0.2085866 |
5.012313e-05 |
0.1331641 |
0.01446607 |
0.3408837 |
0.994123 |
0.0002514916 |
0.2411448 |
4.916524e-06 |
3.32873e-05 |
0.5588696 |
0.9035472 |
0.001961146 |
| control_mean |
3.124116 |
16.89225 |
35.607 |
60.76664 |
499.0092 |
433.6578 |
91.01064 |
50.49853 |
21.41336 |
276.4907 |
76.72541 |
45.13108 |
106.5219 |
160.3457 |
24.21595 |
30.74202 |
34.33406 |
82.61671 |
71.83639 |
60.22598 |
156.3828 |
111.1731 |
0.8031685 |
4.699328 |
4.57674 |
0.5917922 |
3.650282 |
0.688878 |
0.5786692 |
0.1591727 |
1.195207 |
23.70645 |
14.02739 |
2.731364 |
0.6419407 |
2.311366 |
0.296098 |
1.388073 |
0.386422 |
0.5218794 |
| khafif_mean |
4.351811 |
31.83114 |
33.82565 |
62.58102 |
452.0396 |
368.7695 |
90.92358 |
41.08363 |
25.23839 |
316.9333 |
55.10125 |
62.89814 |
241.4277 |
191.2362 |
24.36319 |
38.52066 |
41.42105 |
98.41708 |
83.019 |
42.34168 |
112.1734 |
155.1452 |
0.6725561 |
4.747437 |
4.472902 |
0.6002664 |
2.930733 |
0.6390026 |
0.3970949 |
0.1435599 |
1.164709 |
20.81183 |
13.96961 |
2.114185 |
0.5712807 |
1.376055 |
0.1845871 |
1.45169 |
0.3845713 |
0.32802 |
| shadid_mean |
3.737478 |
19.24975 |
35.29254 |
67.79022 |
440.1453 |
325.2838 |
89.87248 |
41.41137 |
21.7162 |
317.3907 |
50.35759 |
55.41536 |
239.0795 |
168.2889 |
21.91696 |
36.63578 |
36.9358 |
79.82987 |
68.52315 |
35.84384 |
97.7258 |
152.715 |
0.6784302 |
4.925123 |
4.08827 |
0.4900274 |
2.703046 |
0.6361233 |
0.4069953 |
0.1473239 |
0.9177522 |
20.96172 |
13.38492 |
1.618508 |
0.6730123 |
1.399314 |
0.1683273 |
1.481802 |
0.3600984 |
0.3216298 |
Mann-Whitney & Kolmogrov-Smirnov test on all columns vs recoddiseas2(modate bimari)
data = total_noSeg %>% filter(recoddiseas2>0)
recoddiseas2_mw = as.data.frame(sapply(data[, c(6:45)], function(x)
wilcox.test(x~data$recoddiseas2)))
recoddiseas2_mw = rbind(recoddiseas2_mw , adjusted_p.value_fdr = p.adjust(unlist(recoddiseas2_mw[3,]), method = 'fdr'))
recoddiseas2_mw = rbind(recoddiseas2_mw , adjusted_p.value_hochberg = p.adjust(unlist(recoddiseas2_mw[3,]), method = 'hochberg'))
recoddiseas2_mw = rbind(recoddiseas2_mw, less_six =colMeans(total_noSeg[total_noSeg$recoddiseas2==1, 6:45] ))
recoddiseas2_mw = rbind(recoddiseas2_mw, more_six =colMeans(total_noSeg[total_noSeg$recoddiseas2==2, 6:45]) )
kable(recoddiseas2_mw)
| statistic |
179 |
172 |
136 |
125 |
135 |
172 |
105 |
184 |
141 |
128 |
158 |
119 |
141 |
120 |
120 |
119 |
118 |
116 |
134 |
148 |
157 |
210 |
189 |
165 |
157 |
150 |
180 |
202 |
190 |
190 |
171 |
196 |
180 |
167 |
172 |
209 |
177 |
173 |
221 |
163 |
| parameter |
NULL |
NULL |
NULL |
NULL |
NULL |
NULL |
NULL |
NULL |
NULL |
NULL |
NULL |
NULL |
NULL |
NULL |
NULL |
NULL |
NULL |
NULL |
NULL |
NULL |
NULL |
NULL |
NULL |
NULL |
NULL |
NULL |
NULL |
NULL |
NULL |
NULL |
NULL |
NULL |
NULL |
NULL |
NULL |
NULL |
NULL |
NULL |
NULL |
NULL |
| p.value |
0.8218698 |
0.9880272 |
0.2983262 |
0.168745 |
0.284413 |
0.9880272 |
0.04563677 |
0.7073835 |
0.3744164 |
0.1990715 |
0.7073835 |
0.1184626 |
0.3744164 |
0.1259344 |
0.1259344 |
0.1184626 |
0.1113344 |
0.09807196 |
0.2709349 |
0.4986944 |
0.6851533 |
0.2452772 |
0.5990186 |
0.8688752 |
0.6851533 |
0.537755 |
0.7985884 |
0.3583325 |
0.5782569 |
0.5782569 |
1 |
0.4611587 |
0.7985884 |
0.9163372 |
0.9880272 |
0.2578903 |
0.8688752 |
0.964092 |
0.1337585 |
0.8218698 |
| null.value |
0 |
0 |
0 |
0 |
0 |
0 |
0 |
0 |
0 |
0 |
0 |
0 |
0 |
0 |
0 |
0 |
0 |
0 |
0 |
0 |
0 |
0 |
0 |
0 |
0 |
0 |
0 |
0 |
0 |
0 |
0 |
0 |
0 |
0 |
0 |
0 |
0 |
0 |
0 |
0 |
| alternative |
two.sided |
two.sided |
two.sided |
two.sided |
two.sided |
two.sided |
two.sided |
two.sided |
two.sided |
two.sided |
two.sided |
two.sided |
two.sided |
two.sided |
two.sided |
two.sided |
two.sided |
two.sided |
two.sided |
two.sided |
two.sided |
two.sided |
two.sided |
two.sided |
two.sided |
two.sided |
two.sided |
two.sided |
two.sided |
two.sided |
two.sided |
two.sided |
two.sided |
two.sided |
two.sided |
two.sided |
two.sided |
two.sided |
two.sided |
two.sided |
| method |
Wilcoxon rank sum test |
Wilcoxon rank sum test |
Wilcoxon rank sum test |
Wilcoxon rank sum test |
Wilcoxon rank sum test |
Wilcoxon rank sum test |
Wilcoxon rank sum test |
Wilcoxon rank sum test |
Wilcoxon rank sum test |
Wilcoxon rank sum test |
Wilcoxon rank sum test |
Wilcoxon rank sum test |
Wilcoxon rank sum test |
Wilcoxon rank sum test |
Wilcoxon rank sum test |
Wilcoxon rank sum test |
Wilcoxon rank sum test |
Wilcoxon rank sum test |
Wilcoxon rank sum test |
Wilcoxon rank sum test |
Wilcoxon rank sum test |
Wilcoxon rank sum test |
Wilcoxon rank sum test |
Wilcoxon rank sum test |
Wilcoxon rank sum test |
Wilcoxon rank sum test |
Wilcoxon rank sum test |
Wilcoxon rank sum test |
Wilcoxon rank sum test |
Wilcoxon rank sum test |
Wilcoxon rank sum test |
Wilcoxon rank sum test |
Wilcoxon rank sum test |
Wilcoxon rank sum test |
Wilcoxon rank sum test |
Wilcoxon rank sum test |
Wilcoxon rank sum test |
Wilcoxon rank sum test |
Wilcoxon rank sum test |
Wilcoxon rank sum test |
| data.name |
x by data$recoddiseas2 |
x by data$recoddiseas2 |
x by data$recoddiseas2 |
x by data$recoddiseas2 |
x by data$recoddiseas2 |
x by data$recoddiseas2 |
x by data$recoddiseas2 |
x by data$recoddiseas2 |
x by data$recoddiseas2 |
x by data$recoddiseas2 |
x by data$recoddiseas2 |
x by data$recoddiseas2 |
x by data$recoddiseas2 |
x by data$recoddiseas2 |
x by data$recoddiseas2 |
x by data$recoddiseas2 |
x by data$recoddiseas2 |
x by data$recoddiseas2 |
x by data$recoddiseas2 |
x by data$recoddiseas2 |
x by data$recoddiseas2 |
x by data$recoddiseas2 |
x by data$recoddiseas2 |
x by data$recoddiseas2 |
x by data$recoddiseas2 |
x by data$recoddiseas2 |
x by data$recoddiseas2 |
x by data$recoddiseas2 |
x by data$recoddiseas2 |
x by data$recoddiseas2 |
x by data$recoddiseas2 |
x by data$recoddiseas2 |
x by data$recoddiseas2 |
x by data$recoddiseas2 |
x by data$recoddiseas2 |
x by data$recoddiseas2 |
x by data$recoddiseas2 |
x by data$recoddiseas2 |
x by data$recoddiseas2 |
x by data$recoddiseas2 |
| adjusted_p.value_fdr |
1 |
1 |
0.7955365 |
0.7499777 |
0.7955365 |
1 |
0.6687924 |
1 |
0.8320365 |
0.7955365 |
1 |
0.6687924 |
0.8320365 |
0.6687924 |
0.6687924 |
0.6687924 |
0.6687924 |
0.6687924 |
0.7955365 |
0.9973889 |
1 |
0.7955365 |
0.9983643 |
1 |
1 |
0.9983643 |
1 |
0.8320365 |
0.9983643 |
0.9983643 |
1 |
0.9708604 |
1 |
1 |
1 |
0.7955365 |
1 |
1 |
0.6687924 |
1 |
| adjusted_p.value_hochberg |
1 |
1 |
1 |
1 |
1 |
1 |
1 |
1 |
1 |
1 |
1 |
1 |
1 |
1 |
1 |
1 |
1 |
1 |
1 |
1 |
1 |
1 |
1 |
1 |
1 |
1 |
1 |
1 |
1 |
1 |
1 |
1 |
1 |
1 |
1 |
1 |
1 |
1 |
1 |
1 |
| less_six |
4.035562 |
25.69114 |
33.08678 |
60.72593 |
432.7207 |
360.8517 |
81.94685 |
43.29987 |
22.84186 |
301.425 |
52.30318 |
55.12769 |
224.6137 |
161.8015 |
22.13957 |
35.06458 |
36.90545 |
82.90511 |
71.732 |
39.1252 |
106.0426 |
157.5622 |
0.6792645 |
4.644915 |
4.259247 |
0.5233265 |
2.820476 |
0.6589611 |
0.4056015 |
0.1441502 |
1.055709 |
22.14809 |
13.97757 |
2.042163 |
0.6298461 |
1.452841 |
0.1799292 |
1.51121 |
0.403157 |
0.3324435 |
| more_six |
4.071297 |
25.73086 |
36.07246 |
69.74836 |
460.5375 |
333.6415 |
99.34791 |
39.072 |
24.24586 |
333.7605 |
53.31111 |
63.6175 |
256.8275 |
199.3589 |
24.26413 |
40.28387 |
41.70226 |
96.20362 |
80.4372 |
39.23901 |
104.1972 |
150.1638 |
0.6713491 |
5.033341 |
4.313796 |
0.5712417 |
2.819428 |
0.6150559 |
0.3980161 |
0.1467008 |
1.032808 |
19.55123 |
13.37652 |
1.694532 |
0.6111932 |
1.318261 |
0.1732441 |
1.418975 |
0.3404802 |
0.3169604 |
Mann-Whitney & Kolmogrov-Smirnov test on all columns vs familyhistory
data = total_noSeg %>% filter(!is.na(familyhistory))
familyHistory_mw = as.data.frame(sapply(data[, c(6:45)], function(x)
wilcox.test(x~data$familyhistory)))
familyHistory_mw = rbind(familyHistory_mw , adjusted_p.value_hochberg = p.adjust(unlist(familyHistory_mw[3,]), method = 'hochberg'))
familyHistory_mw = rbind(familyHistory_mw , adjusted_p.value_fdr = p.adjust(unlist(familyHistory_mw[3,]), method = 'fdr'))
familyHistory_mw = rbind(familyHistory_mw, no_hist =colMeans(total_noSeg[total_noSeg$familyhistory==0, 6:45], na.rm = T) )
familyHistory_mw = rbind(familyHistory_mw, yes_hist =colMeans(total_noSeg[total_noSeg$familyhistory==1, 6:45], na.rm = T) )
kable(familyHistory_mw)
| statistic |
113 |
107 |
107 |
130 |
137 |
170 |
67 |
132 |
143 |
116 |
156 |
129 |
119 |
134 |
118 |
92 |
124 |
131 |
143 |
165 |
157 |
100 |
145 |
144 |
157 |
206 |
148 |
190 |
147 |
159 |
173 |
198 |
123 |
174 |
151 |
184 |
216 |
142 |
169 |
188 |
| parameter |
NULL |
NULL |
NULL |
NULL |
NULL |
NULL |
NULL |
NULL |
NULL |
NULL |
NULL |
NULL |
NULL |
NULL |
NULL |
NULL |
NULL |
NULL |
NULL |
NULL |
NULL |
NULL |
NULL |
NULL |
NULL |
NULL |
NULL |
NULL |
NULL |
NULL |
NULL |
NULL |
NULL |
NULL |
NULL |
NULL |
NULL |
NULL |
NULL |
NULL |
| p.value |
0.178776 |
0.1239551 |
0.1239551 |
0.4223417 |
0.5610465 |
0.6716575 |
0.003845478 |
0.4598452 |
0.6947405 |
0.2119702 |
1 |
0.4042639 |
0.2492676 |
0.4990909 |
0.2363706 |
0.04224361 |
0.3208436 |
0.4408712 |
0.6947405 |
0.7896609 |
0.9874847 |
0.07727561 |
0.7417229 |
0.7181022 |
0.9874847 |
0.1162177 |
0.813936 |
0.2907876 |
0.7896609 |
0.9374838 |
0.6042625 |
0.1893946 |
0.3055774 |
0.5824727 |
0.8877215 |
0.3866445 |
0.05764172 |
0.6716575 |
0.6947405 |
0.3208436 |
| null.value |
0 |
0 |
0 |
0 |
0 |
0 |
0 |
0 |
0 |
0 |
0 |
0 |
0 |
0 |
0 |
0 |
0 |
0 |
0 |
0 |
0 |
0 |
0 |
0 |
0 |
0 |
0 |
0 |
0 |
0 |
0 |
0 |
0 |
0 |
0 |
0 |
0 |
0 |
0 |
0 |
| alternative |
two.sided |
two.sided |
two.sided |
two.sided |
two.sided |
two.sided |
two.sided |
two.sided |
two.sided |
two.sided |
two.sided |
two.sided |
two.sided |
two.sided |
two.sided |
two.sided |
two.sided |
two.sided |
two.sided |
two.sided |
two.sided |
two.sided |
two.sided |
two.sided |
two.sided |
two.sided |
two.sided |
two.sided |
two.sided |
two.sided |
two.sided |
two.sided |
two.sided |
two.sided |
two.sided |
two.sided |
two.sided |
two.sided |
two.sided |
two.sided |
| method |
Wilcoxon rank sum test |
Wilcoxon rank sum test |
Wilcoxon rank sum test |
Wilcoxon rank sum test |
Wilcoxon rank sum test |
Wilcoxon rank sum test |
Wilcoxon rank sum test |
Wilcoxon rank sum test |
Wilcoxon rank sum test |
Wilcoxon rank sum test |
Wilcoxon rank sum test |
Wilcoxon rank sum test |
Wilcoxon rank sum test |
Wilcoxon rank sum test |
Wilcoxon rank sum test |
Wilcoxon rank sum test |
Wilcoxon rank sum test |
Wilcoxon rank sum test |
Wilcoxon rank sum test |
Wilcoxon rank sum test |
Wilcoxon rank sum test |
Wilcoxon rank sum test |
Wilcoxon rank sum test |
Wilcoxon rank sum test |
Wilcoxon rank sum test |
Wilcoxon rank sum test |
Wilcoxon rank sum test |
Wilcoxon rank sum test |
Wilcoxon rank sum test |
Wilcoxon rank sum test |
Wilcoxon rank sum test |
Wilcoxon rank sum test |
Wilcoxon rank sum test |
Wilcoxon rank sum test |
Wilcoxon rank sum test |
Wilcoxon rank sum test |
Wilcoxon rank sum test |
Wilcoxon rank sum test |
Wilcoxon rank sum test |
Wilcoxon rank sum test |
| data.name |
x by data$familyhistory |
x by data$familyhistory |
x by data$familyhistory |
x by data$familyhistory |
x by data$familyhistory |
x by data$familyhistory |
x by data$familyhistory |
x by data$familyhistory |
x by data$familyhistory |
x by data$familyhistory |
x by data$familyhistory |
x by data$familyhistory |
x by data$familyhistory |
x by data$familyhistory |
x by data$familyhistory |
x by data$familyhistory |
x by data$familyhistory |
x by data$familyhistory |
x by data$familyhistory |
x by data$familyhistory |
x by data$familyhistory |
x by data$familyhistory |
x by data$familyhistory |
x by data$familyhistory |
x by data$familyhistory |
x by data$familyhistory |
x by data$familyhistory |
x by data$familyhistory |
x by data$familyhistory |
x by data$familyhistory |
x by data$familyhistory |
x by data$familyhistory |
x by data$familyhistory |
x by data$familyhistory |
x by data$familyhistory |
x by data$familyhistory |
x by data$familyhistory |
x by data$familyhistory |
x by data$familyhistory |
x by data$familyhistory |
| adjusted_p.value_hochberg |
1 |
1 |
1 |
1 |
1 |
1 |
0.1538191 |
1 |
1 |
1 |
1 |
1 |
1 |
1 |
1 |
1 |
1 |
1 |
1 |
1 |
1 |
1 |
1 |
1 |
1 |
1 |
1 |
1 |
1 |
1 |
1 |
1 |
1 |
1 |
1 |
1 |
1 |
1 |
1 |
1 |
| adjusted_p.value_fdr |
0.8021091 |
0.7083147 |
0.7083147 |
0.8758956 |
0.9263206 |
0.9263206 |
0.1538191 |
0.8758956 |
0.9263206 |
0.8021091 |
1 |
0.8758956 |
0.8021091 |
0.9074381 |
0.8021091 |
0.7083147 |
0.8021091 |
0.8758956 |
0.9263206 |
0.9290128 |
1 |
0.7083147 |
0.9271537 |
0.9265835 |
1 |
0.7083147 |
0.9302126 |
0.8021091 |
0.9290128 |
1 |
0.9263206 |
0.8021091 |
0.8021091 |
0.9263206 |
0.9863572 |
0.8758956 |
0.7083147 |
0.9263206 |
0.9263206 |
0.8021091 |
| no_hist |
3.834042 |
23.9139 |
32.5948 |
62.12807 |
439.5452 |
349.563 |
82.02316 |
37.29338 |
23.29817 |
307.0609 |
52.41527 |
57.80956 |
226.8009 |
179.0759 |
22.48374 |
35.32728 |
38.20443 |
86.46151 |
75.11564 |
38.67737 |
104.3731 |
143.6107 |
0.6728893 |
4.826842 |
4.320725 |
0.5675903 |
2.824321 |
0.6507367 |
0.3993928 |
0.149516 |
1.087842 |
22.41097 |
13.22891 |
1.932903 |
0.6174791 |
1.426197 |
0.1871778 |
1.472401 |
0.3756081 |
0.3463424 |
| yes_hist |
4.457078 |
29.02719 |
38.12906 |
70.62997 |
458.6372 |
344.0167 |
105.8998 |
48.53481 |
23.94345 |
335.7926 |
53.49183 |
61.93166 |
265.1796 |
181.9129 |
24.44587 |
41.80631 |
41.14907 |
94.75278 |
77.53863 |
40.10955 |
106.5697 |
173.0748 |
0.6800743 |
4.846869 |
4.221278 |
0.5079527 |
2.811926 |
0.6133527 |
0.4065609 |
0.1377757 |
0.9646776 |
18.06712 |
14.52749 |
1.762538 |
0.6268505 |
1.315689 |
0.1572908 |
1.455148 |
0.3672332 |
0.2853461 |
# familyHistory_ks = as.data.frame(sapply(data[, 6:45], function(x)
# ks.test(x~data$familyhistory)))
Mann-Whitney & Kolmogrov-Smirnov test on all columns vs newlesion
data = total_noSeg %>% filter(!is.na(newlesion))
newlesion_mw = as.data.frame(sapply(data[, c(6:45)], function(x)
wilcox.test(x~data$newlesion)))
newlesion_mw = rbind(newlesion_mw , adjusted_p.value_fdr = p.adjust(unlist(newlesion_mw[3,]), method = 'fdr'))
newlesion_mw = rbind(newlesion_mw , adjusted_p.value_hochberg = p.adjust(unlist(newlesion_mw[3,]), method = 'hochberg'))
newlesion_mw = rbind(newlesion_mw, no_new_lesion =colMeans(total_noSeg[total_noSeg$recoddiseas2==0, 6:45], na.rm = T) )
newlesion_mw = rbind(newlesion_mw, yes_new_lesion =colMeans(total_noSeg[total_noSeg$recoddiseas2==1, 6:45], na.rm = T) )
kable(newlesion_mw)
| statistic |
152 |
171 |
105 |
121 |
124 |
154 |
158 |
158 |
136 |
144 |
105 |
131 |
166 |
155 |
156 |
189 |
171 |
130 |
133 |
125 |
112 |
156 |
186 |
158 |
129 |
138 |
151 |
147 |
188 |
184 |
109 |
166 |
159 |
120 |
213 |
94 |
123 |
172 |
152 |
147 |
| parameter |
NULL |
NULL |
NULL |
NULL |
NULL |
NULL |
NULL |
NULL |
NULL |
NULL |
NULL |
NULL |
NULL |
NULL |
NULL |
NULL |
NULL |
NULL |
NULL |
NULL |
NULL |
NULL |
NULL |
NULL |
NULL |
NULL |
NULL |
NULL |
NULL |
NULL |
NULL |
NULL |
NULL |
NULL |
NULL |
NULL |
NULL |
NULL |
NULL |
NULL |
| p.value |
0.7927673 |
0.7690258 |
0.08216865 |
0.219181 |
0.2566871 |
0.8407945 |
0.9384257 |
0.9384257 |
0.4478842 |
0.6097609 |
0.08216865 |
0.3602474 |
0.8894059 |
0.865038 |
0.8894059 |
0.3939649 |
0.7690258 |
0.3440713 |
0.3939649 |
0.2700989 |
0.1300296 |
0.8894059 |
0.4478842 |
0.9384257 |
0.328354 |
0.4859826 |
0.7690258 |
0.6763469 |
0.4114979 |
0.4859826 |
0.1074387 |
0.8894059 |
0.9630324 |
0.2075762 |
0.1074387 |
0.03609037 |
0.2437323 |
0.7454936 |
0.7927673 |
0.6763469 |
| null.value |
0 |
0 |
0 |
0 |
0 |
0 |
0 |
0 |
0 |
0 |
0 |
0 |
0 |
0 |
0 |
0 |
0 |
0 |
0 |
0 |
0 |
0 |
0 |
0 |
0 |
0 |
0 |
0 |
0 |
0 |
0 |
0 |
0 |
0 |
0 |
0 |
0 |
0 |
0 |
0 |
| alternative |
two.sided |
two.sided |
two.sided |
two.sided |
two.sided |
two.sided |
two.sided |
two.sided |
two.sided |
two.sided |
two.sided |
two.sided |
two.sided |
two.sided |
two.sided |
two.sided |
two.sided |
two.sided |
two.sided |
two.sided |
two.sided |
two.sided |
two.sided |
two.sided |
two.sided |
two.sided |
two.sided |
two.sided |
two.sided |
two.sided |
two.sided |
two.sided |
two.sided |
two.sided |
two.sided |
two.sided |
two.sided |
two.sided |
two.sided |
two.sided |
| method |
Wilcoxon rank sum test |
Wilcoxon rank sum test |
Wilcoxon rank sum test |
Wilcoxon rank sum test |
Wilcoxon rank sum test |
Wilcoxon rank sum test |
Wilcoxon rank sum test |
Wilcoxon rank sum test |
Wilcoxon rank sum test |
Wilcoxon rank sum test |
Wilcoxon rank sum test |
Wilcoxon rank sum test |
Wilcoxon rank sum test |
Wilcoxon rank sum test |
Wilcoxon rank sum test |
Wilcoxon rank sum test |
Wilcoxon rank sum test |
Wilcoxon rank sum test |
Wilcoxon rank sum test |
Wilcoxon rank sum test |
Wilcoxon rank sum test |
Wilcoxon rank sum test |
Wilcoxon rank sum test |
Wilcoxon rank sum test |
Wilcoxon rank sum test |
Wilcoxon rank sum test |
Wilcoxon rank sum test |
Wilcoxon rank sum test |
Wilcoxon rank sum test |
Wilcoxon rank sum test |
Wilcoxon rank sum test |
Wilcoxon rank sum test |
Wilcoxon rank sum test |
Wilcoxon rank sum test |
Wilcoxon rank sum test |
Wilcoxon rank sum test |
Wilcoxon rank sum test |
Wilcoxon rank sum test |
Wilcoxon rank sum test |
Wilcoxon rank sum test |
| data.name |
x by data$newlesion |
x by data$newlesion |
x by data$newlesion |
x by data$newlesion |
x by data$newlesion |
x by data$newlesion |
x by data$newlesion |
x by data$newlesion |
x by data$newlesion |
x by data$newlesion |
x by data$newlesion |
x by data$newlesion |
x by data$newlesion |
x by data$newlesion |
x by data$newlesion |
x by data$newlesion |
x by data$newlesion |
x by data$newlesion |
x by data$newlesion |
x by data$newlesion |
x by data$newlesion |
x by data$newlesion |
x by data$newlesion |
x by data$newlesion |
x by data$newlesion |
x by data$newlesion |
x by data$newlesion |
x by data$newlesion |
x by data$newlesion |
x by data$newlesion |
x by data$newlesion |
x by data$newlesion |
x by data$newlesion |
x by data$newlesion |
x by data$newlesion |
x by data$newlesion |
x by data$newlesion |
x by data$newlesion |
x by data$newlesion |
x by data$newlesion |
| adjusted_p.value_fdr |
0.9624879 |
0.9624879 |
0.8595095 |
0.9256812 |
0.9256812 |
0.9624879 |
0.9624879 |
0.9624879 |
0.9256812 |
0.9624879 |
0.8595095 |
0.9256812 |
0.9624879 |
0.9624879 |
0.9624879 |
0.9256812 |
0.9624879 |
0.9256812 |
0.9256812 |
0.9256812 |
0.8668638 |
0.9624879 |
0.9256812 |
0.9624879 |
0.9256812 |
0.9256812 |
0.9624879 |
0.9624879 |
0.9256812 |
0.9256812 |
0.8595095 |
0.9624879 |
0.9630324 |
0.9256812 |
0.8595095 |
0.8595095 |
0.9256812 |
0.9624879 |
0.9624879 |
0.9624879 |
| adjusted_p.value_hochberg |
0.9630324 |
0.9630324 |
0.9630324 |
0.9630324 |
0.9630324 |
0.9630324 |
0.9630324 |
0.9630324 |
0.9630324 |
0.9630324 |
0.9630324 |
0.9630324 |
0.9630324 |
0.9630324 |
0.9630324 |
0.9630324 |
0.9630324 |
0.9630324 |
0.9630324 |
0.9630324 |
0.9630324 |
0.9630324 |
0.9630324 |
0.9630324 |
0.9630324 |
0.9630324 |
0.9630324 |
0.9630324 |
0.9630324 |
0.9630324 |
0.9630324 |
0.9630324 |
0.9630324 |
0.9630324 |
0.9630324 |
0.9630324 |
0.9630324 |
0.9630324 |
0.9630324 |
0.9630324 |
| no_new_lesion |
3.124116 |
16.89225 |
35.607 |
60.76664 |
499.0092 |
433.6578 |
91.01064 |
50.49853 |
21.41336 |
276.4907 |
76.72541 |
45.13108 |
106.5219 |
160.3457 |
24.21595 |
30.74202 |
34.33406 |
82.61671 |
71.83639 |
60.22598 |
156.3828 |
111.1731 |
0.8031685 |
4.699328 |
4.57674 |
0.5917922 |
3.650282 |
0.688878 |
0.5786692 |
0.1591727 |
1.195207 |
23.70645 |
14.02739 |
2.731364 |
0.6419407 |
2.311366 |
0.296098 |
1.388073 |
0.386422 |
0.5218794 |
| yes_new_lesion |
4.035562 |
25.69114 |
33.08678 |
60.72593 |
432.7207 |
360.8517 |
81.94685 |
43.29987 |
22.84186 |
301.425 |
52.30318 |
55.12769 |
224.6137 |
161.8015 |
22.13957 |
35.06458 |
36.90545 |
82.90511 |
71.732 |
39.1252 |
106.0426 |
157.5622 |
0.6792645 |
4.644915 |
4.259247 |
0.5233265 |
2.820476 |
0.6589611 |
0.4056015 |
0.1441502 |
1.055709 |
22.14809 |
13.97757 |
2.042163 |
0.6298461 |
1.452841 |
0.1799292 |
1.51121 |
0.403157 |
0.3324435 |
# newlesion_ks = as.data.frame(sapply(data[, 6:45], function(x)
# ks.test(x~data$newlesion)))
Mann-Whitney & Kolmogrov-Smirnov test on all columns vs recodnonseg2
data = total_noSeg %>% filter(!is.na(recodnonseg2))
recodnonseg2_mw = as.data.frame(sapply(data[, c(6:45)], function(x)
wilcox.test(x~data$recodnonseg2)))
recodnonseg2_mw = rbind(recodnonseg2_mw , adjusted_p.value_fdr = p.adjust(unlist(recodnonseg2_mw[3,]), method = 'fdr'))
recodnonseg2_mw = rbind(recodnonseg2_mw , adjusted_p.value_hochberg = p.adjust(unlist(recodnonseg2_mw[3,]), method = 'hochberg'))
recodnonseg2_mw = rbind(recodnonseg2_mw, khafif_mean =colMeans(total_noSeg[total_noSeg$recodnonseg2==0, 6:45], na.rm = T) )
recodnonseg2_mw = rbind(recodnonseg2_mw, shadid_mean =colMeans(total_noSeg[total_noSeg$recodnonseg2==1, 6:45], na.rm = T) )
kable(recodnonseg2_mw)
| statistic |
225 |
264 |
152 |
139 |
202 |
180 |
171 |
154 |
237 |
170 |
208 |
239 |
162 |
223 |
223 |
194 |
215 |
237 |
257 |
207 |
203 |
187 |
164 |
177 |
213 |
232 |
207 |
174 |
163 |
145 |
229 |
153 |
173 |
212 |
114 |
164 |
195 |
139 |
184 |
198 |
| parameter |
NULL |
NULL |
NULL |
NULL |
NULL |
NULL |
NULL |
NULL |
NULL |
NULL |
NULL |
NULL |
NULL |
NULL |
NULL |
NULL |
NULL |
NULL |
NULL |
NULL |
NULL |
NULL |
NULL |
NULL |
NULL |
NULL |
NULL |
NULL |
NULL |
NULL |
NULL |
NULL |
NULL |
NULL |
NULL |
NULL |
NULL |
NULL |
NULL |
NULL |
| p.value |
0.1045406 |
0.004033456 |
0.5782569 |
0.3426794 |
0.3583325 |
0.7985884 |
1 |
0.6201035 |
0.04563677 |
0.9880272 |
0.2709349 |
0.03920604 |
0.7985884 |
0.1184626 |
0.1184626 |
0.4986944 |
0.1885597 |
0.04563677 |
0.008219927 |
0.284413 |
0.3426794 |
0.6414975 |
0.8453054 |
0.8688752 |
0.2099941 |
0.06555546 |
0.284413 |
0.9401884 |
0.8218698 |
0.442985 |
0.08052259 |
0.5990186 |
0.964092 |
0.2213329 |
0.08607255 |
0.8453054 |
0.4797312 |
0.3426794 |
0.7073835 |
0.4252178 |
| null.value |
0 |
0 |
0 |
0 |
0 |
0 |
0 |
0 |
0 |
0 |
0 |
0 |
0 |
0 |
0 |
0 |
0 |
0 |
0 |
0 |
0 |
0 |
0 |
0 |
0 |
0 |
0 |
0 |
0 |
0 |
0 |
0 |
0 |
0 |
0 |
0 |
0 |
0 |
0 |
0 |
| alternative |
two.sided |
two.sided |
two.sided |
two.sided |
two.sided |
two.sided |
two.sided |
two.sided |
two.sided |
two.sided |
two.sided |
two.sided |
two.sided |
two.sided |
two.sided |
two.sided |
two.sided |
two.sided |
two.sided |
two.sided |
two.sided |
two.sided |
two.sided |
two.sided |
two.sided |
two.sided |
two.sided |
two.sided |
two.sided |
two.sided |
two.sided |
two.sided |
two.sided |
two.sided |
two.sided |
two.sided |
two.sided |
two.sided |
two.sided |
two.sided |
| method |
Wilcoxon rank sum test |
Wilcoxon rank sum test |
Wilcoxon rank sum test |
Wilcoxon rank sum test |
Wilcoxon rank sum test |
Wilcoxon rank sum test |
Wilcoxon rank sum test |
Wilcoxon rank sum test |
Wilcoxon rank sum test |
Wilcoxon rank sum test |
Wilcoxon rank sum test |
Wilcoxon rank sum test |
Wilcoxon rank sum test |
Wilcoxon rank sum test |
Wilcoxon rank sum test |
Wilcoxon rank sum test |
Wilcoxon rank sum test |
Wilcoxon rank sum test |
Wilcoxon rank sum test |
Wilcoxon rank sum test |
Wilcoxon rank sum test |
Wilcoxon rank sum test |
Wilcoxon rank sum test |
Wilcoxon rank sum test |
Wilcoxon rank sum test |
Wilcoxon rank sum test |
Wilcoxon rank sum test |
Wilcoxon rank sum test |
Wilcoxon rank sum test |
Wilcoxon rank sum test |
Wilcoxon rank sum test |
Wilcoxon rank sum test |
Wilcoxon rank sum test |
Wilcoxon rank sum test |
Wilcoxon rank sum test |
Wilcoxon rank sum test |
Wilcoxon rank sum test |
Wilcoxon rank sum test |
Wilcoxon rank sum test |
Wilcoxon rank sum test |
| data.name |
x by data$recodnonseg2 |
x by data$recodnonseg2 |
x by data$recodnonseg2 |
x by data$recodnonseg2 |
x by data$recodnonseg2 |
x by data$recodnonseg2 |
x by data$recodnonseg2 |
x by data$recodnonseg2 |
x by data$recodnonseg2 |
x by data$recodnonseg2 |
x by data$recodnonseg2 |
x by data$recodnonseg2 |
x by data$recodnonseg2 |
x by data$recodnonseg2 |
x by data$recodnonseg2 |
x by data$recodnonseg2 |
x by data$recodnonseg2 |
x by data$recodnonseg2 |
x by data$recodnonseg2 |
x by data$recodnonseg2 |
x by data$recodnonseg2 |
x by data$recodnonseg2 |
x by data$recodnonseg2 |
x by data$recodnonseg2 |
x by data$recodnonseg2 |
x by data$recodnonseg2 |
x by data$recodnonseg2 |
x by data$recodnonseg2 |
x by data$recodnonseg2 |
x by data$recodnonseg2 |
x by data$recodnonseg2 |
x by data$recodnonseg2 |
x by data$recodnonseg2 |
x by data$recodnonseg2 |
x by data$recodnonseg2 |
x by data$recodnonseg2 |
x by data$recodnonseg2 |
x by data$recodnonseg2 |
x by data$recodnonseg2 |
x by data$recodnonseg2 |
| adjusted_p.value_fdr |
0.4307732 |
0.1613383 |
0.8848242 |
0.682538 |
0.682538 |
0.9654168 |
1 |
0.8848242 |
0.3650942 |
1 |
0.6692071 |
0.3650942 |
0.9654168 |
0.4307732 |
0.4307732 |
0.7979111 |
0.6285324 |
0.3650942 |
0.1643985 |
0.6692071 |
0.682538 |
0.8848242 |
0.9654168 |
0.9654168 |
0.6323797 |
0.4303628 |
0.6692071 |
1 |
0.9654168 |
0.7704087 |
0.4303628 |
0.8848242 |
1 |
0.6323797 |
0.4303628 |
0.9654168 |
0.7979111 |
0.682538 |
0.943178 |
0.7704087 |
| adjusted_p.value_hochberg |
1 |
0.1613383 |
1 |
1 |
1 |
1 |
1 |
1 |
1 |
1 |
1 |
1 |
1 |
1 |
1 |
1 |
1 |
1 |
0.3205772 |
1 |
1 |
1 |
1 |
1 |
1 |
1 |
1 |
1 |
1 |
1 |
1 |
1 |
1 |
1 |
1 |
1 |
1 |
1 |
1 |
1 |
| khafif_mean |
4.351811 |
31.83114 |
33.82565 |
62.58102 |
452.0396 |
368.7695 |
90.92358 |
41.08363 |
25.23839 |
316.9333 |
55.10125 |
62.89814 |
241.4277 |
191.2362 |
24.36319 |
38.52066 |
41.42105 |
98.41708 |
83.019 |
42.34168 |
112.1734 |
155.1452 |
0.6725561 |
4.747437 |
4.472902 |
0.6002664 |
2.930733 |
0.6390026 |
0.3970949 |
0.1435599 |
1.164709 |
20.81183 |
13.96961 |
2.114185 |
0.5712807 |
1.376055 |
0.1845871 |
1.45169 |
0.3845713 |
0.32802 |
| shadid_mean |
3.737478 |
19.24975 |
35.29254 |
67.79022 |
440.1453 |
325.2838 |
89.87248 |
41.41137 |
21.7162 |
317.3907 |
50.35759 |
55.41536 |
239.0795 |
168.2889 |
21.91696 |
36.63578 |
36.9358 |
79.82987 |
68.52315 |
35.84384 |
97.7258 |
152.715 |
0.6784302 |
4.925123 |
4.08827 |
0.4900274 |
2.703046 |
0.6361233 |
0.4069953 |
0.1473239 |
0.9177522 |
20.96172 |
13.38492 |
1.618508 |
0.6730123 |
1.399314 |
0.1683273 |
1.481802 |
0.3600984 |
0.3216298 |
# newlesion_ks = as.data.frame(sapply(data[, 6:45], function(x)
# ks.test(x~data$newlesion)))
heatmap & dendrograms with health2 as labels
heatmap(as.matrix(total_noSeg[, c(6:27)]), scale="column", labRow = total_noSeg$health2)

heatmap & dendrograms with newlesion as labels
heatmap(as.matrix(total_noSeg[, c(6:27)]), scale="column", labRow = total_noSeg$newlesion)

heatmap & dendrograms with familyHistory as labels
heatmap(as.matrix(total_noSeg[, c(6:27)], scale="column", labRow = total_noSeg$familyhistory))

heatmap & dendrograms with nonsegmentale as labels
heatmap(as.matrix(total_noSeg[, c(6:27)], scale="column", labRow = total_noSeg$nonsegmentale))

heatmap & dendrograms with recoddiseas2 as labels
heatmap(as.matrix(total_noSeg[, c(6:27)], scale="column", labRow = total_noSeg$recoddiseas2))

random forest(feature selection)
data = total_noSeg[, c(4,6:27)]
data$health2 = as.factor(data$health2)
output.forest <- randomForest(health2 ~ .,
data )
print(output.forest)
##
## Call:
## randomForest(formula = health2 ~ ., data = data)
## Type of random forest: classification
## Number of trees: 500
## No. of variables tried at each split: 4
##
## OOB estimate of error rate: 9.86%
## Confusion matrix:
## Healthy ILL class.error
## Healthy 34 0 0.0000000
## ILL 7 30 0.1891892
print(output.forest$confusion)
## Healthy ILL class.error
## Healthy 34 0 0.0000000
## ILL 7 30 0.1891892
importance =data.frame(name = row.names(output.forest$importance),MeanDecreaseGini = output.forest$importance)
knitr::kable(arrange(importance, desc(MeanDecreaseGini)))
| lysine |
4.4508826 |
| argenine |
4.1735678 |
| cysteine |
4.0230278 |
| ornitine |
3.0911975 |
| tyrosine |
2.9597111 |
| proline |
1.8405242 |
| aspartate |
1.6779623 |
| tryptophane |
1.5600716 |
| glutamate |
1.4869353 |
| histithine |
1.2569367 |
| glutamine |
1.0023942 |
| methionine |
0.9501119 |
| glycine |
0.8812520 |
| asparagine |
0.7936855 |
| phenylalanine |
0.7179538 |
| serine |
0.7052845 |
| valine |
0.6020607 |
| alanine |
0.6000511 |
| isoleucine |
0.5802141 |
| theronine |
0.5590213 |
| citrolline |
0.5186358 |
| leucine |
0.5049686 |
plot Random Forest Model
plot(output.forest)

heatmap with most important features(no proportions)
most_important_features = importance %>% filter(MeanDecreaseGini>1)
data= total_noSeg[ , which(names(total_noSeg) %in% most_important_features$name)]
heatmap(as.matrix(data),scale="column", labRow = total_noSeg$health2)

boxplots for most important features
for (i in 1:ncol(data)){
print(i)
name = colnames(data)[i]
plot = ggplot(data, aes(x = total_noSeg$health2, y = data[, i])) +
geom_boxplot()+ ylab(name)+ xlab('health')
print(plot)
}
## [1] 1

## [1] 2

## [1] 3

## [1] 4

## [1] 5

## [1] 6

## [1] 7

## [1] 8

## [1] 9

## [1] 10

## [1] 11

random forest with proportions(feature selection)
data = total_noSeg[, c(4,6:45)]
data$health2 = as.factor(data$health2)
output.forest_prop <- randomForest(health2 ~ .,
data )
print(output.forest_prop)
##
## Call:
## randomForest(formula = health2 ~ ., data = data)
## Type of random forest: classification
## Number of trees: 500
## No. of variables tried at each split: 6
##
## OOB estimate of error rate: 12.68%
## Confusion matrix:
## Healthy ILL class.error
## Healthy 31 3 0.08823529
## ILL 6 31 0.16216216
print(output.forest_prop$confusion)
## Healthy ILL class.error
## Healthy 31 3 0.08823529
## ILL 6 31 0.16216216
importance_prop =data.frame(name = row.names(output.forest_prop$importance),MeanDecreaseGini = output.forest_prop$importance)
importance_prop = arrange(importance_prop, desc(MeanDecreaseGini))
knitr::kable(importance_prop)
| argtophe |
4.2677499 |
| lysine |
3.2539367 |
| cysteine |
3.0031481 |
| argenine |
2.2609431 |
| argtoala |
2.0338725 |
| mettotyr |
1.8357484 |
| orntocit |
1.7089554 |
| ornitine |
1.6020697 |
| Xletotyr |
1.0910564 |
| tyrosine |
1.0520867 |
| proline |
1.0296687 |
| tryptophane |
0.9557608 |
| aspartate |
0.9556368 |
| glutamate |
0.8260966 |
| Xletophe |
0.7189635 |
| tyrtocys |
0.7040800 |
| histithine |
0.6904941 |
| argtoorn |
0.5951173 |
| mettocit |
0.4921165 |
| glutamine |
0.4588401 |
| glutocit |
0.3578793 |
| XletoAla |
0.3409359 |
| methionine |
0.3345465 |
| leucine |
0.3299992 |
| phetotyr |
0.3240922 |
| asparagine |
0.3167259 |
| valine |
0.3112363 |
| glycine |
0.2853957 |
| cittophe |
0.2826849 |
| alanine |
0.2813152 |
| mettoxle |
0.2809446 |
| cittoarg |
0.2773493 |
| valtophe |
0.2732817 |
| serine |
0.2604170 |
| citrolline |
0.2462431 |
| isoleucine |
0.2085660 |
| alatocit |
0.2016680 |
| theronine |
0.1935923 |
| phenylalanine |
0.1854059 |
| mettophe |
0.1554923 |
plot Random Forest Model (with proportions)
plot(output.forest_prop)

heatmap with most important features(no proportions)
most_important_features = importance_prop %>% filter(MeanDecreaseGini>1)
data= total_noSeg[ , which(names(total_noSeg) %in% most_important_features$name)]
heatmap(as.matrix(data),scale="column", labRow = total_noSeg$health2)

boxplots for most important features
for (i in 1:ncol(data)){
print(i)
name = colnames(data)[i]
plot = ggplot(data, aes(x = total_noSeg$health2, y = data[, i])) +
geom_boxplot()+ ylab(name)+ xlab('health')
print(plot)
}
## [1] 1

## [1] 2

## [1] 3

## [1] 4

## [1] 5

## [1] 6

## [1] 7

## [1] 8

## [1] 9

## [1] 10

## [1] 11

heatmap with most important features(proportions are included)
most_important_features = importance_prop %>% filter(MeanDecreaseGini>1)
data= total_noSeg[ , which(names(total_noSeg) %in% most_important_features$name)]
heatmap(as.matrix(data),scale="column", labRow = total_noSeg$health2)

predict with glm
data$health2 = total_noSeg$health2
data$health2 [data$health2=='ILL']= 1
data$health2 [data$health2=='Healthy']= 0
data$health2= as.numeric(data$health2)
model = glm( data=data, health2~., binomial(link='logit'))
print(summary(model))
##
## Call:
## glm(formula = health2 ~ ., family = binomial(link = "logit"),
## data = data)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -2.21439 -0.22762 0.01774 0.11962 2.46490
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 10.437944 8.621792 1.211 0.2260
## argenine -0.078095 0.073915 -1.057 0.2907
## tyrosine 0.047833 0.103108 0.464 0.6427
## cysteine 0.018071 0.010326 1.750 0.0801 .
## ornitine -0.019864 0.048774 -0.407 0.6838
## lysine -0.018213 0.024747 -0.736 0.4617
## proline -0.004119 0.012225 -0.337 0.7361
## Xletotyr -1.022730 1.294238 -0.790 0.4294
## mettotyr -0.816744 8.157086 -0.100 0.9202
## orntocit -0.623229 0.913357 -0.682 0.4950
## argtophe -1.957933 2.146485 -0.912 0.3617
## argtoala 8.712359 10.951594 0.796 0.4263
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 98.300 on 70 degrees of freedom
## Residual deviance: 23.613 on 59 degrees of freedom
## AIC: 47.613
##
## Number of Fisher Scoring iterations: 7
predict_data = data.frame(true=data$health2 ,predicted = ifelse(fitted(model) > 0.5, '1', '0'), m = fitted(model))
predict_data$res = predict_data$predicted==predict_data$true
(sum(predict_data$res)/ nrow(predict_data))*100
## [1] 94.3662
RocPlot for glm model
basicplot <- ggplot(predict_data, aes(d = true, m = m)) + geom_roc(n.cuts = 0, labels = T)
basicplot+ annotate("text", x = .75, y = .25, label = paste("AUC =", round(calc_auc(basicplot)$AUC, 2)))

pairwise scatter plot for most important features
most_important_features = importance_prop %>% filter(MeanDecreaseGini>1)
data= total_noSeg[ , which(names(total_noSeg) %in% most_important_features$name)]
data$health2 = total_noSeg$health2
ggpairs(data=data, # data.frame with variables
mapping = ggplot2::aes(colour = as.factor(health2)),
lower = list(continuous = wrap("smooth", method = "lm")),
columns=1:ncol(data), # columns to plot, default to all.
title=("Vitiligo data") # title of the plot
)
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

RandomForest for nonsegmentale (Feature Selection)
data = total_noSeg[, c(6:45)]
data$nonsegmentale = as.factor(total_noSeg$nonsegmentale)
output.forest_nonseg <- randomForest(nonsegmentale ~ ., data= data )
print(output.forest_nonseg)
##
## Call:
## randomForest(formula = nonsegmentale ~ ., data = data)
## Type of random forest: classification
## Number of trees: 500
## No. of variables tried at each split: 6
##
## OOB estimate of error rate: 28.17%
## Confusion matrix:
## 0 1 2 class.error
## 0 34 0 0 0.0000000
## 1 5 6 8 0.6842105
## 2 3 4 11 0.3888889
print(output.forest_nonseg$confusion)
## 0 1 2 class.error
## 0 34 0 0 0.0000000
## 1 5 6 8 0.6842105
## 2 3 4 11 0.3888889
importance_nonSeg =data.frame(name = row.names(output.forest_nonseg$importance),MeanDecreaseGini = output.forest_nonseg$importance)
importance_nonSeg = arrange(importance_nonSeg, desc(MeanDecreaseGini))
knitr::kable(importance_nonSeg)
| argtophe |
3.6202689 |
| lysine |
3.4626188 |
| glutamate |
2.5802438 |
| cysteine |
2.5042770 |
| argenine |
2.0677510 |
| orntocit |
1.9359773 |
| mettotyr |
1.7553453 |
| ornitine |
1.7343827 |
| argtoala |
1.6192344 |
| Xletotyr |
1.3525380 |
| leucine |
1.2547284 |
| aspartate |
1.2426284 |
| tyrtocys |
1.1649300 |
| tyrosine |
1.1017939 |
| Xletophe |
1.0276210 |
| proline |
1.0252972 |
| mettocit |
0.9864488 |
| histithine |
0.9040171 |
| tryptophane |
0.8891480 |
| asparagine |
0.8743952 |
| argtoorn |
0.8494314 |
| XletoAla |
0.8211537 |
| citrolline |
0.7043045 |
| isoleucine |
0.6804186 |
| methionine |
0.6482214 |
| valine |
0.6096959 |
| phetotyr |
0.5938201 |
| glutamine |
0.5904713 |
| cittoarg |
0.5871350 |
| phenylalanine |
0.5702291 |
| serine |
0.5649915 |
| valtophe |
0.5509432 |
| glutocit |
0.5119671 |
| theronine |
0.4752374 |
| glycine |
0.4605956 |
| cittophe |
0.4458480 |
| mettophe |
0.4432158 |
| mettoxle |
0.4315571 |
| alatocit |
0.3778848 |
| alanine |
0.3387265 |
plot random forest model for nonsegmentale
plot(output.forest_nonseg)

heatmap with most important features for nonsegmentale(proportions are included)
most_important_features = importance_nonSeg %>% filter(MeanDecreaseGini>1)
data= total_noSeg[ , which(names(total_noSeg) %in% most_important_features$name)]
heatmap(as.matrix(data),scale="column", labRow = total_noSeg$nonsegmentale)

boxplots for most important features
for (i in 1:ncol(data)){
print(i)
name = colnames(data)[i]
plot = ggplot(data, aes(x = as.factor(total_noSeg$nonsegmentale), y = data[, i])) +
geom_boxplot()+ ylab(name)+ xlab('nonsegmentale')
print(plot)
}
## [1] 1

## [1] 2

## [1] 3

## [1] 4

## [1] 5

## [1] 6

## [1] 7

## [1] 8

## [1] 9

## [1] 10

## [1] 11

## [1] 12

## [1] 13

## [1] 14

## [1] 15

## [1] 16

RandomForest for recodnonseg2 (Feature Selection)
data = total_noSeg[!is.na(total_noSeg$recodnonseg2), c(6:45)]
data$recodnonseg2 = as.factor(total_noSeg[!is.na(total_noSeg$recodnonseg2), ]$recodnonseg2)
output.forest_nonseg2 <- randomForest(recodnonseg2 ~ ., data= data )
print(output.forest_nonseg2)
##
## Call:
## randomForest(formula = recodnonseg2 ~ ., data = data)
## Type of random forest: classification
## Number of trees: 500
## No. of variables tried at each split: 6
##
## OOB estimate of error rate: 40.54%
## Confusion matrix:
## 0 1 class.error
## 0 9 10 0.5263158
## 1 5 13 0.2777778
print(output.forest_nonseg2$confusion)
## 0 1 class.error
## 0 9 10 0.5263158
## 1 5 13 0.2777778
importance_nonSeg2 =data.frame(name = row.names(output.forest_nonseg2$importance),MeanDecreaseGini = output.forest_nonseg2$importance)
importance_nonSeg2 = arrange(importance_nonSeg2, desc(MeanDecreaseGini))
knitr::kable(importance_nonSeg2)
| glutamate |
1.3771335 |
| leucine |
1.0178354 |
| tyrosine |
0.8484063 |
| asparagine |
0.7192235 |
| mettocit |
0.6620016 |
| citrolline |
0.6595340 |
| glutamine |
0.5745504 |
| lysine |
0.5729731 |
| cittophe |
0.5381576 |
| Xletotyr |
0.5116598 |
| XletoAla |
0.5056345 |
| methionine |
0.4855840 |
| orntocit |
0.4773288 |
| mettotyr |
0.4651458 |
| serine |
0.4605557 |
| isoleucine |
0.4260063 |
| glutocit |
0.4060991 |
| argenine |
0.3936690 |
| phenylalanine |
0.3893310 |
| aspartate |
0.3767340 |
| argtophe |
0.3764566 |
| argtoala |
0.3727233 |
| argtoorn |
0.3607005 |
| proline |
0.3494384 |
| valine |
0.3366872 |
| mettoxle |
0.3343400 |
| alanine |
0.3340815 |
| ornitine |
0.3231181 |
| histithine |
0.3204424 |
| Xletophe |
0.3077756 |
| theronine |
0.3023159 |
| valtophe |
0.3022618 |
| cittoarg |
0.2876059 |
| tryptophane |
0.2801585 |
| phetotyr |
0.2754304 |
| tyrtocys |
0.2602275 |
| mettophe |
0.2601541 |
| glycine |
0.2575445 |
| alatocit |
0.2254761 |
| cysteine |
0.2218226 |
predict with glm: recodnonseg2
most_important_features = importance_nonSeg2 %>% filter(MeanDecreaseGini>0.8)
data= total_noSeg[ , which(names(total_noSeg) %in% most_important_features$name)]
data$recodnonseg2 = as.numeric(total_noSeg$recodnonseg2)
#when we add the below factors, AUC changes from 0.79 to 0.89
data$familyhistory = as.factor(total_noSeg$familyhistory)
data$recoddiseas2 = as.factor(total_noSeg$recoddiseas2)
data$newlesion = as.factor(total_noSeg$newlesion)
data = na.omit(data)
model = glm( data=data, recodnonseg2~., family = binomial(link = 'logit'))
print(summary(model))
##
## Call:
## glm(formula = recodnonseg2 ~ ., family = binomial(link = "logit"),
## data = data)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -2.3174 -0.7784 -0.0556 0.5432 2.1714
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 4.31156 2.21217 1.949 0.0513 .
## glutamate -0.12552 0.05747 -2.184 0.0290 *
## tyrosine 0.02115 0.04699 0.450 0.6526
## leucine -0.04690 0.04031 -1.163 0.2447
## familyhistory1 1.99901 1.16842 1.711 0.0871 .
## recoddiseas22 1.57539 1.13865 1.384 0.1665
## newlesion1 -0.92081 1.02231 -0.901 0.3677
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 51.266 on 36 degrees of freedom
## Residual deviance: 30.808 on 30 degrees of freedom
## AIC: 44.808
##
## Number of Fisher Scoring iterations: 5
predict_data = data.frame(true=total_noSeg[!is.na(total_noSeg$recodnonseg2), ]$recodnonseg2 ,predicted = ifelse(fitted(model) > 0.5, '1', '0'), m = fitted(model))
predict_data$res = predict_data$predicted==predict_data$true
(sum(predict_data$res)/ nrow(predict_data))*100
## [1] 78.37838
RocPlot for glm model
basicplot <- ggplot(predict_data, aes(d = true, m = m)) + geom_roc(n.cuts = 0, labels = T)
basicplot+ annotate("text", x = .75, y = .25, label = paste("AUC =", round(calc_auc(basicplot)$AUC, 2)))
